#!/bin/bash
SCRIPT_PATH=/share/projset/dsir7
CONFIG_FILE=/share/projset/dsir7/indexes/qwen_tokenizer_3_300000/feat_config.json
RAW_DATASETS=/share/projset/dsir7/data/flan2-filtered-merge.jsonl
TARGET_DATASETS=/share/projset/dsir7/data/llm-eval-v2-merge.jsonl
NUM_TO_SAMPLE=3000000
MIN_EXAMPLE_LENGTH=0
MERGED_DIR=/share/projset/dsir7/indexes/qwen_tokenizer_3_300000/flan2-filtered-merge_llm-eval-v2-merge_0.0_0/resampled
OUTFILE=/share/projset/dsir7/indexes/qwen_tokenizer_3_300000/merged_flan2-llmevalv2.jsonl
# bash scripts/search.sh $CONFIG_FILE $RAW_DATASETS $TARGET_DATASETS $NUM_TO_SAMPLE $MIN_EXAMPLE_LENGTH
bash scripts/merged_jsonl.sh $MERGED_DIR $OUTFILE